Morphomics Report
Load Data
dataDir="d:\\workSync\\HaichunYang\\202306_podocyteMorphomics\\202310_data\\"
setwd(dataDir)
files=list.files(pattern="*.csv")
samples=gsub("_.*","",files)
samples=gsub("6793-","",samples)
metaTable=data.frame(Sample=samples,File=files,stringsAsFactors=FALSE)
#celltypes=sapply(strsplit(files,"_"),function(x) x[2])
#metaTable=data.frame(Sample=samples,File=files,CellType=celltypes,stringsAsFactors=FALSE)
featureDataAll=ReadMorphomicFeatures(metaTable)
#add celltype column
featureDataAll[["SampleData"]]$CellType=sapply(strsplit(featureDataAll[["SampleData"]]$FileName,"_"),function(x) x[1])Data Transformation
#temp=DataSelection(featureDataAll,featureType=c("AreaShape","Intensity"))
featureDataAllNormlized=MorphomicFeaturesNormlization(featureDataAll)## [1] "Remove variables less than 10 unique values"
## [1] "23 features removed"
## [1] "Number of features after selection:"
##
## AreaShape Granularity Intensity Neighbors RadialDistribution Texture
## 104 3 15 6 67 52
## [1] "Remove variables with NA"
## [1] "3 features removed"
## [1] "AngleBetweenNeighbors_Expanded;FirstClosestDistance_Expanded;SecondClosestDistance_Expanded"
## [1] "Number of features after selection:"
##
## AreaShape Granularity Intensity Neighbors RadialDistribution Texture
## 104 3 15 3 67 52
## [1] "Remove variables less than 2% subjects with unique values"
## [1] "Normalization done"
Data Describe
## Warning: package 'Hmisc' was built under R version 4.2.3
Raw
Transformation summary
##
## BoxCox (1) Boxcox trans.(2) Log trans. Raw data Raw due to fail in Boxcox trans.(2)
## 13 16 92 121 2
PCA: Propotion of Variance Explained
By Feature Type
pList=list()
for (reductionName in c("AreaShape","Granularity" ,"Intensity","Neighbors", "RadialDistribution" ,"Texture" )){
pList[[reductionName]]=plotPcaBarplot(featureDataAllNormlized,reductionName=paste0(reductionName,"_PCA"))+ggtitle(reductionName)
}## Warning: package 'viridis' was built under R version 4.2.3
## Warning: package 'viridisLite' was built under R version 4.2.3
PCA: Correlation of PCs in different cell
By Feature Type
# names(featureDataAllNormlized[["Reductions"]])
#
# reductionName="Texture"
# temp1=featureDataAllNormlized[["Reductions"]][[reductionName]]$cell.embeddings[,3]
#
# reductionName="podocyte_Texture"
# temp2=featureDataAllNormlized[["Reductions"]][[reductionName]]$cell.embeddings[,3]
plotPCsByCellType(featureDataAllNormlized,reductionName="AreaShape_PCA")PCA: Loading of PCs in different cell
Feature Correlation Heatmap
sparsePCA analysis
Perform sparsePCA based on PCA results
## Warning: package 'sparsepca' was built under R version 4.2.3
temp=featureDataAllNormlized
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=30,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$ALL_SparsePCA=resultSparsePCA
featureType="AreaShape"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)## [1] "AreaShape were selected as featureType"
## [1] "Number of features after selection:"
##
## AreaShape
## 104
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=10,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$AreaShape_SparsePCA=resultSparsePCA
featureType="Intensity"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)## [1] "Intensity were selected as featureType"
## [1] "Number of features after selection:"
##
## Intensity
## 15
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=4,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$Intensity_SparsePCA=resultSparsePCA
featureType="Neighbors"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)## [1] "Neighbors were selected as featureType"
## [1] "Number of features after selection:"
##
## Neighbors
## 3
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=4,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$Neighbors_SparsePCA=resultSparsePCA
featureType="RadialDistribution"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)## [1] "RadialDistribution were selected as featureType"
## [1] "Number of features after selection:"
##
## RadialDistribution
## 67
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=10,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$RadialDistribution_SparsePCA=resultSparsePCA
featureType="Texture"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)## [1] "Texture were selected as featureType"
## [1] "Number of features after selection:"
##
## Texture
## 52
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=4,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$Texture_SparsePCA=resultSparsePCA
featureType="Granularity"
temp=DataSelection(featureDataAllNormlized,featureType=featureType)## [1] "Granularity were selected as featureType"
## [1] "Number of features after selection:"
##
## Granularity
## 3
resultSparsePCA=featureDataSparsePca(temp,dataName="FeatureDataNormlized",k=4,alpha = 1e-03)
featureDataAllNormlized[["Reductions"]]$Granularity_SparsePCA=resultSparsePCA
#
# dataForsPCA=temp[["FeatureDataNormlized"]]
# set.seed(123)
# rspca.results <- rspca((dataForsPCA), k=10, verbose=TRUE, max_iter=1000,center=TRUE, scale=TRUE)
# row.names(rspca.results$loadings)=colnames(dataForsPCA)
# head(rspca.results$loadings)
#
# summary(rspca.results)
#
#
# featureType="Intensity"
# temp=DataSelection(featureDataAllNormlized,featureType=featureType)
# dataForsPCA=temp[["FeatureDataNormlized"]]
#
# rspca.results <- rspca((dataForsPCA), k=5, verbose=TRUE, max_iter=1000,center=TRUE, scale=TRUE,alpha = 1e-03)
# row.names(rspca.results$loadings)=colnames(dataForsPCA)
# colnames(rspca.results$loadings)=paste0("PC",1:ncol(rspca.results$loadings))
#
# head(rspca.results$loadings)
# #how many features for each PC
# apply(rspca.results$loadings,2,function(x) length(which(x!=0))/length(x))
# #Existing in how many PCs for each feature
# table(apply(rspca.results$loadings,1,function(x) length(which(x!=0))))
#
# summary(rspca.results)
# temp=rspca.results$loadings
# temp[temp==0]=NA
# ComplexHeatmap::Heatmap(temp,cluster_columns = FALSE,cluster_rows = FALSE)sparsePCA loading Vis
for (reduction in c("ALL_SparsePCA","AreaShape_SparsePCA","RadialDistribution_SparsePCA")) {
plot(plotPCsLoading(featureDataAllNormlized[["Reductions"]][[reduction]]$feature.loadings[,1:9])+ggtitle(reduction))
}for (reduction in c("Intensity_SparsePCA","Neighbors_SparsePCA","Texture_SparsePCA","Granularity_SparsePCA")) {
plot(plotPCsLoading(featureDataAllNormlized[["Reductions"]][[reduction]]$feature.loadings[,1:3])+ggtitle(reduction))
}# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$AreaShape_SparsePCA$feature.loadings[,1:9])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$RadialDistribution_SparsePCA$feature.loadings[,1:9])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$Intensity_SparsePCA$feature.loadings[,1:4])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$Neighbors_SparsePCA$feature.loadings[,1:4])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$Texture_SparsePCA$feature.loadings[,1:4])
# plotPCsLoading(featureDataAllNormlized[["Reductions"]]$Granularity_SparsePCA$feature.loadings[,1:4])Differential detection
Diff By cell type in all samples
testResult=testFeatureDiff(featureDataAllNormlized,dataName="AreaShape_PCA",groupColumn="CellType")
knitr::kable(head(testResult))| pValue | mesangial | PEC | podocyte | pAdj | |
|---|---|---|---|---|---|
| PC1 | 0.0000002 | -1.0709680 | 1.3836765 | -0.2211795 | 0.0000079 |
| PC2 | 0.0000000 | -1.7483061 | 1.2918140 | 0.7335462 | 0.0000000 |
| PC3 | 0.3333083 | 0.1845570 | -0.2341403 | 0.0332326 | 0.4812937 |
| PC4 | 0.0015975 | 0.0359318 | -0.3347513 | 0.3337718 | 0.0151033 |
| PC5 | 0.1260320 | -0.0398208 | 0.1939320 | -0.1695555 | 0.2912740 |
| PC6 | 0.1461189 | 0.0157159 | 0.1556479 | -0.1960573 | 0.3233268 |
plotFeatureByGroup(featureDataAllNormlized,dataName="AreaShape_PCA",featureNames=c("PC1","PC2","PC4","PC5"),groupColumn="CellType")Diff By Sample in one cell type
[1] “mesangial were selected as CellType” [1] “Number of Sample/CellType after selection:”
mesangial
AF-10 93 AF-7 81 AF-8 52 AF-9 88
testResult=testFeatureDiff(temp,dataName="AreaShape_PCA",groupColumn="Sample")
knitr::kable(head(testResult))| pValue | AF-10 | AF-7 | AF-8 | AF-9 | pAdj | |
|---|---|---|---|---|---|---|
| PC1 | 0.0232520 | -1.0038110 | -1.5160756 | 0.5604785 | -1.6962761 | 0.2969516 |
| PC2 | 0.0140227 | -1.2655242 | -2.4173712 | -1.5701714 | -1.7479360 | 0.2916714 |
| PC3 | 0.7268383 | 0.0880016 | 0.2199501 | 0.4413957 | 0.1022524 | 0.8688641 |
| PC4 | 0.3827271 | -0.1403118 | -0.1022275 | 0.2612373 | 0.2162236 | 0.7196790 |
| PC5 | 0.2570135 | 0.0164711 | 0.1119739 | 0.0826852 | -0.3114212 | 0.6591459 |
| PC6 | 0.0172450 | -0.3151867 | 0.0401784 | 0.4218819 | 0.1028959 | 0.2934276 |
#extract cells with largest PC2 and smallest PC2
cellEmbeddingsTable=temp[["Reductions"]][["AreaShape_PCA"]]$cell.embeddings[row.names(temp[["SampleData"]]),]
temp1=cellEmbeddingsTable[head(order(cellEmbeddingsTable[,"PC2"])),][,1:3]
temp2=cellEmbeddingsTable[tail(order(cellEmbeddingsTable[,"PC2"])),][,1:3]Diff By Group in each cell type
featureDataAllNormlized[["SampleData"]]$Group=ifelse(featureDataAllNormlized[["SampleData"]]$Sample %in% c("AF-7","AF-8"),"Sample78","Sample910")
allCellTypes=unique(featureDataAllNormlized[["SampleData"]]$CellType)
testResultList=list()
for (cellType in allCellTypes) {
temp=DataSelection(featureDataAllNormlized,CellType=cellType)
testResult=testFeatureDiff(temp,dataName="AreaShape_PCA",groupColumn="Group")
#knitr::kable(head(testResult))
testResultList[[cellType]]=testResult
}[1] “PEC were selected as CellType” [1] “Number of Sample/CellType after selection:”
PEC
AF-10 85 AF-7 68 AF-8 50 AF-9 80 [1] “mesangial were selected as CellType” [1] “Number of Sample/CellType after selection:”
mesangial
AF-10 93 AF-7 81 AF-8 52 AF-9 88 [1] “podocyte were selected as CellType” [1] “Number of Sample/CellType after selection:”
podocyte
AF-10 84 AF-7 59 AF-8 31 AF-9 76
for (cellType in names(testResultList)) {
print(knitr::kable(head(testResultList[[cellType]]),caption = cellType))
}| pValue | Sample78 | Sample910 | pAdj | |
|---|---|---|---|---|
| PC1 | 0.1647764 | 1.9417594 | 0.9845627 | 0.9019341 |
| PC2 | 0.0245740 | 2.0304581 | 0.7635715 | 0.8518979 |
| PC3 | 0.3595083 | 0.0405176 | -0.4305623 | 0.9740970 |
| PC4 | 0.8446652 | -0.3158944 | -0.3482368 | 0.9847101 |
| PC5 | 0.6300036 | 0.2955662 | 0.1212481 | 0.9811349 |
| PC6 | 0.2848396 | 0.3101746 | 0.0451378 | 0.9740970 |
| pValue | Sample78 | Sample910 | pAdj | |
|---|---|---|---|---|
| PC1 | 0.2174119 | -0.7041898 | -1.3404791 | 0.6824032 |
| PC2 | 0.0446062 | -2.0861352 | -1.5000670 | 0.4079057 |
| PC3 | 0.3620851 | 0.3065303 | 0.0949302 | 0.7515257 |
| PC4 | 0.8125580 | 0.0398790 | 0.0330314 | 0.9707279 |
| PC5 | 0.1358766 | 0.1005227 | -0.1429462 | 0.5887988 |
| PC6 | 0.0264400 | 0.1894158 | -0.1119200 | 0.4079057 |
| pValue | Sample78 | Sample910 | pAdj | |
|---|---|---|---|---|
| PC1 | 0.0536558 | 0.7163934 | -0.7485642 | 0.6846849 |
| PC2 | 0.0454283 | 1.2657749 | 0.4341676 | 0.6846849 |
| PC3 | 0.6227424 | 0.1249957 | -0.0183841 | 0.9675981 |
| PC4 | 0.0550258 | -0.0586012 | 0.5544816 | 0.6846849 |
| PC5 | 0.8740448 | -0.0615449 | -0.2303114 | 0.9825623 |
| PC6 | 0.5909086 | -0.3399449 | -0.1151205 | 0.9675981 |
[1] “mesangial were selected as CellType” [1] “Number of Sample/CellType after selection:”
mesangial
AF-10 93 AF-7 81 AF-8 52 AF-9 88
[1] “PEC were selected as CellType” [1] “Number of Sample/CellType after selection:”
PEC
AF-10 85 AF-7 68 AF-8 50 AF-9 80
[1] “podocyte were selected as CellType” [1] “Number of Sample/CellType after selection:”
podocyte
AF-10 84 AF-7 59 AF-8 31 AF-9 76
plotFeatureByGroup(temp,dataName="AreaShape_PCA",featureNames=c("PC1","PC2","PC4"),groupColumn="Group")#PC2 is more related
#extract cells with largest PC2 and smallest PC2
cellEmbeddingsTable=featureDataAllNormlized[["Reductions"]][["AreaShape_PCA"]]$cell.embeddings
temp1=cellEmbeddingsTable[head(order(cellEmbeddingsTable[,"PC2"])),][,1:3]
temp2=cellEmbeddingsTable[tail(order(cellEmbeddingsTable[,"PC2"])),][,1:3]
#dput(row.names(temp1))
#dput(row.names(temp2))